1 Introduction

This markdown is for analyzing the Allen Brain Atlas MTG (middle temporal gyrus) dataset. Some details of their analysis is provided here. Briefly, these data were generated using SMART-Seq v4 Ultra Low Input RNA Kit, which is an improved version of SMART-seq2 protocol.

This pipeline using some alternative strategies for data processing and analysis, mostly based on bioconductor workflows for scRNAseq.

The packages required for the analysis are as follows: - scater: collection of tools for doing quality control analyses of scRNA-seq - scran: methods provide normalization of cell-specific biases, correcting batch effects, identify marker genes - SC3: package for single cell consensus clustering.

2 Obtaining/Loading Counts

2.1 Loading libraries.

bio_packs = c("SingleCellExperiment","biomaRt","scater","scran","SC3")

for(pack1 in bio_packs){
  if( !pack1 %in% installed.packages()[,"Package"]){
    source("https://bioconductor.org/biocLite.R")
    biocLite(pack1, suppressUpdates = TRUE)
  }
}

cran_packs = c("data.table", "svd", "Rtsne")

for(pack1 in cran_packs){
  if( !pack1 %in% installed.packages()[,"Package"]){
    install.packages(pack1)
  }
}

library(SingleCellExperiment)
library(scater)
library(scran)
library(limma)
library(data.table)
library(svd)
library(Rtsne)
library(SC3)

source("SOURCE.R")

Before runing this R markdown, please first download the datafile human_MTG_gene_expression_matrices_2018-06-14.zip from here to your local computer, and unzip the file. Then in the following code chunk, set the working directory to be the directory of the unzipped folder.

You can modify the following code to determine if the analysis will be conducted with exon counts only(exon_only = TRUE) or with both exon and intron counts summed together (exon_only = FALSE). We have requested 7 cores (num_cores = 7) to improve SC3 runtime.

work_dir = "~/research/scRNAseq/data/Allen_BI/"
work_dir = paste0(work_dir, "human_MTG_gene_expression_matrices_2018-06-14")
work_dir = normalizePath(work_dir)

exon_only = TRUE
num_cores = 7

exons_fn    = "human_MTG_2018-06-14_exon-matrix.csv"
introns_fn  = "human_MTG_2018-06-14_intron-matrix.csv"

The following code read-in count data. To be compatible with other studies that only use count data from exonic regions, here we just read the exon counts.

counts = fread(file.path(work_dir, exons_fn), data.table=FALSE)
dim(counts)
## [1] 50281 15929
counts[1:2,1:2]
##       V1 F1S4_160106_001_B01
## 1 353007                   0
## 2 353008                   0
rownames(counts) = counts$V1
counts = as.matrix(counts[,-1])

if(! exon_only) {
    intron_counts = fread(file.path(work_dir, introns_fn), data.table=FALSE)
  rownames(intron_counts) = intron_counts$V1
    intron_counts = as.matrix(intron_counts[,-1])
    counts = counts + intron_counts
}

Next we add the sample/cell information and gene information. Spike-in is used for this data, though in this final count matrix, the spike-in’s are not included.

cell_data=fread(file.path(work_dir,"human_MTG_2018-06-14_samples-columns.csv"))

dim(cell_data)
## [1] 15928    34
cell_data[1:2,]
##            sample_name sample_id sample_type     organism     donor sex
## 1: F1S4_160106_001_B01 556012415      Nuclei Homo sapiens H200.1030   M
## 2: F1S4_160106_001_C01 556012410      Nuclei Homo sapiens H200.1030   M
##    age_days brain_hemisphere brain_region brain_subregion facs_date
## 1:    19710                L          MTG              L5  1/6/2016
## 2:    19710                L          MTG              L5  1/6/2016
##     facs_container facs_sort_criteria rna_amplification_set
## 1: F1S4_160106_001      NeuN-positive        A8S4_160401_02
## 2: F1S4_160106_001      NeuN-positive        A8S4_160401_02
##    library_prep_set library_prep_avg_size_bp           seq_name seq_tube
## 1:   L8S4_160406_02                      429 LS-15051_S02_E1-50 LS-15051
## 2:   L8S4_160406_02                      448 LS-15051_S03_E1-50 LS-15051
##        seq_batch total_reads percent_exon_reads percent_intron_reads
## 1: R8S4-160411-H     2572946           38.57718             36.84679
## 2: R8S4-160411-H     2755839           30.59417             44.61727
##    percent_intergenic_reads percent_rrna_reads percent_mt_exon_reads
## 1:                 10.14973                  0            0.09005241
## 2:                 12.40834                  0            0.05247041
##    percent_reads_unique percent_synth_reads percent_ecoli_reads
## 1:             85.57370         0.007534554         0.018536728
## 2:             87.61978         0.002790439         0.007264575
##    percent_aligned_reads_total complexity_cg genes_detected_cpm_criterion
## 1:                    92.41888     0.3166372                         8635
## 2:                    94.00132     0.2879887                        11697
##    genes_detected_fpkm_criterion         class             cluster
## 1:                          5253     GABAergic Inh L4-6 SST B3GAT2
## 2:                          8246 Glutamatergic Exc L5-6 RORB TTC12
table(cell_data$class)
## 
##     GABAergic Glutamatergic      no class  Non-neuronal 
##          4164         10525           325           914
table(cell_data$cluster)
## 
##    Astro L1-2 FGFR3 GFAP Astro L1-6 FGFR3 SLC14A1        Endo L2-6 NOSTRIN 
##                       61                      230                        9 
##         Exc L2 LAMP5 LTK Exc L2-3 LINC00507 FREM3 Exc L2-4 LINC00507 GLP2R 
##                      812                     2284                      170 
##    Exc L3-4 RORB CARM1P1    Exc L3-5 RORB COL22A1       Exc L3-5 RORB ESR1 
##                      280                      160                     1428 
##    Exc L3-5 RORB FILIP1L     Exc L3-5 RORB TWIST2     Exc L4-5 FEZF2 SCN4B 
##                      153                       93                       25 
##      Exc L4-5 RORB DAPK2     Exc L4-5 RORB FOLH1B      Exc L4-6 FEZF2 IL26 
##                      173                      870                      344 
##        Exc L4-6 RORB C1R     Exc L4-6 RORB SEMA3E       Exc L5-6 FEZF2 ABO 
##                      160                      777                      373 
##  Exc L5-6 FEZF2 EFTUD1P1      Exc L5-6 RORB TTC12    Exc L5-6 SLC17A7 IL15 
##                      314                      167                       56 
##    Exc L5-6 THEMIS C1QL3   Exc L5-6 THEMIS CRABP1  Exc L5-6 THEMIS DCSTAMP 
##                     1537                      147                       53 
##    Exc L5-6 THEMIS FGF10       Exc L6 FEZF2 OR2T8      Exc L6 FEZF2 SCUBE1 
##                       78                       19                       52 
##        Inh L1 SST CHRNA4          Inh L1 SST NMBR       Inh L1-2 GAD1 MC4R 
##                       52                      283                      107 
##       Inh L1-2 LAMP5 DBP      Inh L1-2 PAX6 CDH12  Inh L1-2 PAX6 TNFAIP8L3 
##                       21                       90                       16 
##       Inh L1-2 SST BAGE2         Inh L1-2 VIP LBH      Inh L1-2 VIP PCDH20 
##                      108                       47                       61 
##     Inh L1-2 VIP TSPAN12       Inh L1-3 PAX6 SYT6       Inh L1-3 SST CALB1 
##                       42                       29                      279 
##    Inh L1-3 VIP ADAMTSL1     Inh L1-3 VIP CCDC184       Inh L1-3 VIP CHRM2 
##                       72                       64                      175 
##         Inh L1-3 VIP GGH      Inh L1-4 LAMP5 LCP2      Inh L1-4 VIP CHRNA6 
##                       68                      356                       25 
##       Inh L1-4 VIP OPRM1        Inh L1-4 VIP PENK       Inh L2-3 VIP CASC6 
##                       52                       17                       45 
##     Inh L2-4 PVALB WFDC2        Inh L2-4 SST FRZB       Inh L2-4 VIP CBLN1 
##                      387                       64                       67 
##      Inh L2-4 VIP SPAG17    Inh L2-5 PVALB SCUBE3    Inh L2-5 VIP SERPINF1 
##                       33                       32                       55 
##         Inh L2-5 VIP TYR       Inh L2-6 LAMP5 CA1        Inh L2-6 VIP QPCT 
##                       62                      256                       37 
##      Inh L3-5 SST ADGRG6        Inh L3-6 SST HPGD         Inh L3-6 SST NPY 
##                      132                       60                       15 
##    Inh L3-6 VIP HS3ST3A1      Inh L4-5 PVALB MEPE      Inh L4-5 SST STK32A 
##                       80                       64                       93 
##     Inh L4-6 PVALB SULF1      Inh L4-6 SST B3GAT2      Inh L4-6 SST GXYLT2 
##                      167                      182                       41 
##      Inh L5-6 GAD1 GLP1R      Inh L5-6 PVALB LGR5     Inh L5-6 SST KLHDC8A 
##                       27                       52                       63 
##    Inh L5-6 SST MIR548F2     Inh L5-6 SST NPM1P10          Inh L5-6 SST TH 
##                       80                       79                       27 
##        Micro L1-3 TYROBP                 no class        Oligo L1-6 OPALIN 
##                       63                      325                      313 
##          OPC L1-6 PDGFRA 
##                      238
cell_data$cell_type = sapply(strsplit(cell_data$cluster, split=" "), "[", 1)
table(cell_data$cell_type)
## 
## Astro  Endo   Exc   Inh Micro    no Oligo   OPC 
##   291     9 10525  4164    63   325   313   238
cell_data$cell_type[which(cell_data$cell_type == "no")] = "unknown"

# Double check samples are correctly sorted
all(colnames(counts) == cell_data$sample_name)
## [1] TRUE
sce = SingleCellExperiment(assays = list(counts = counts), colData = cell_data)
rm(counts,cell_data)

# Import gene info
gene_dat = smart_RT(file.path(work_dir,"human_MTG_2018-06-14_genes-rows.csv"), 
                    sep=',', header=TRUE)
dim(gene_dat)
## [1] 50281     5
gene_dat[1:3,]
##      gene chromosome entrez_id
## 1 3.8-1.2          6    353007
## 2 3.8-1.3          6    353008
## 3 3.8-1.4          6    353009
##                                              gene_name mouse_homologenes
## 1 HLA complex group 26 (non-protein coding) pseudogene                  
## 2 HLA complex group 26 (non-protein coding) pseudogene                  
## 3 HLA complex group 26 (non-protein coding) pseudogene
all(rownames(sce) == as.character(gene_dat$entrez_id))
## [1] TRUE
rowData(sce)  = gene_dat
rownames(sce) = rowData(sce)$gene
sce
## class: SingleCellExperiment 
## dim: 50281 15928 
## metadata(0):
## assays(1): counts
## rownames(50281): 3.8-1.2 3.8-1.3 ... bA255A11.4 bA395L14.12
## rowData names(5): gene chromosome entrez_id gene_name
##   mouse_homologenes
## colnames(15928): F1S4_160106_001_B01 F1S4_160106_001_C01 ...
##   F2S4_170405_060_F01 F2S4_170405_060_H01
## colData names(35): sample_name sample_id ... cluster cell_type
## reducedDimNames(0):
## spikeNames(0):
rm(gene_dat)

# Checking spike ins
rowData(sce)[grep("^ERCC", rowData(sce)$gene),]
## DataFrame with 10 rows and 5 columns
##           gene  chromosome entrez_id
##    <character> <character> <integer>
## 1        ERCC1          19      2067
## 2        ERCC2          19      2068
## 3        ERCC3           2      2071
## 4        ERCC4          16      2072
## 5        ERCC5          13      2073
## 6        ERCC6          10      2074
## 7  ERCC6-PGBD3          10 101243544
## 8       ERCC6L           X     54821
## 9      ERCC6L2           9    375748
## 10       ERCC8           5      1161
##                                               gene_name mouse_homologenes
##                                             <character>       <character>
## 1         excision repair cross-complementation group 1             Ercc1
## 2         excision repair cross-complementation group 2             Ercc2
## 3         excision repair cross-complementation group 3             Ercc3
## 4         excision repair cross-complementation group 4             Ercc4
## 5         excision repair cross-complementation group 5             Ercc5
## 6         excision repair cross-complementation group 6             Ercc6
## 7                               ERCC6-PGBD3 readthrough                  
## 8    excision repair cross-complementation group 6-like            Ercc6l
## 9  excision repair cross-complementation group 6-like 2           Ercc6l2
## 10        excision repair cross-complementation group 8             Ercc8
sce
## class: SingleCellExperiment 
## dim: 50281 15928 
## metadata(0):
## assays(1): counts
## rownames(50281): 3.8-1.2 3.8-1.3 ... bA255A11.4 bA395L14.12
## rowData names(5): gene chromosome entrez_id gene_name
##   mouse_homologenes
## colnames(15928): F1S4_160106_001_B01 F1S4_160106_001_C01 ...
##   F2S4_170405_060_F01 F2S4_170405_060_H01
## colData names(35): sample_name sample_id ... cluster cell_type
## reducedDimNames(0):
## spikeNames(0):

3 Pre-processing: Quality Control, Gene Detection, Normalization

3.0.1 Incorporate Mito and Ribo to calculate QC metrics

Next step we apply QC based on a set of features per cell. The code and filtering below are motivated by the vignette presented here.

sort(names(colData(sce)))
##  [1] "age_days"                      "brain_hemisphere"             
##  [3] "brain_region"                  "brain_subregion"              
##  [5] "cell_type"                     "class"                        
##  [7] "cluster"                       "complexity_cg"                
##  [9] "donor"                         "facs_container"               
## [11] "facs_date"                     "facs_sort_criteria"           
## [13] "genes_detected_cpm_criterion"  "genes_detected_fpkm_criterion"
## [15] "library_prep_avg_size_bp"      "library_prep_set"             
## [17] "organism"                      "percent_aligned_reads_total"  
## [19] "percent_ecoli_reads"           "percent_exon_reads"           
## [21] "percent_intergenic_reads"      "percent_intron_reads"         
## [23] "percent_mt_exon_reads"         "percent_reads_unique"         
## [25] "percent_rrna_reads"            "percent_synth_reads"          
## [27] "rna_amplification_set"         "sample_id"                    
## [29] "sample_name"                   "sample_type"                  
## [31] "seq_batch"                     "seq_name"                     
## [33] "seq_tube"                      "sex"                          
## [35] "total_reads"
sort(names(rowData(sce)))
## [1] "chromosome"        "entrez_id"         "gene"             
## [4] "gene_name"         "mouse_homologenes"
sce = calculateQCMetrics(sce)
## Note that the names of some metrics have changed, see 'Renamed metrics' in ?calculateQCMetrics.
## Old names are currently maintained for back-compatibility, but may be removed in future releases.
sort(names(colData(sce)))
##  [1] "age_days"                       "brain_hemisphere"              
##  [3] "brain_region"                   "brain_subregion"               
##  [5] "cell_type"                      "class"                         
##  [7] "cluster"                        "complexity_cg"                 
##  [9] "donor"                          "facs_container"                
## [11] "facs_date"                      "facs_sort_criteria"            
## [13] "genes_detected_cpm_criterion"   "genes_detected_fpkm_criterion" 
## [15] "is_cell_control"                "library_prep_avg_size_bp"      
## [17] "library_prep_set"               "log10_total_counts"            
## [19] "log10_total_features"           "log10_total_features_by_counts"
## [21] "organism"                       "pct_counts_in_top_100_features"
## [23] "pct_counts_in_top_200_features" "pct_counts_in_top_50_features" 
## [25] "pct_counts_in_top_500_features" "pct_counts_top_100_features"   
## [27] "pct_counts_top_200_features"    "pct_counts_top_50_features"    
## [29] "pct_counts_top_500_features"    "percent_aligned_reads_total"   
## [31] "percent_ecoli_reads"            "percent_exon_reads"            
## [33] "percent_intergenic_reads"       "percent_intron_reads"          
## [35] "percent_mt_exon_reads"          "percent_reads_unique"          
## [37] "percent_rrna_reads"             "percent_synth_reads"           
## [39] "rna_amplification_set"          "sample_id"                     
## [41] "sample_name"                    "sample_type"                   
## [43] "seq_batch"                      "seq_name"                      
## [45] "seq_tube"                       "sex"                           
## [47] "total_counts"                   "total_features"                
## [49] "total_features_by_counts"       "total_reads"
sort(names(rowData(sce)))
##  [1] "chromosome"            "entrez_id"            
##  [3] "gene"                  "gene_name"            
##  [5] "is_feature_control"    "log10_mean_counts"    
##  [7] "log10_total_counts"    "mean_counts"          
##  [9] "mouse_homologenes"     "n_cells_by_counts"    
## [11] "n_cells_counts"        "pct_dropout_by_counts"
## [13] "pct_dropout_counts"    "total_counts"
par(mfrow=c(3,2), mar=c(5, 4, 1, 1), bty="n", cex=0.9)

smart_hist(log10(sce$total_counts),xlab="log10(Library sizes)", main="", 
    breaks=40,ylab="Number of cells")

smart_hist(log10(sce$total_features),xlab="log10(# of expressed genes)", 
    main="", breaks=40,ylab="Number of cells")

smart_hist(sce$percent_rrna_reads, xlab="Ribosome prop. (%)",
    ylab="Number of cells", breaks=40, main="")

smart_hist(sce$percent_mt_exon_reads, xlab="Mitochondrial prop. (%)", 
    ylab="Number of cells", breaks=80, main="")

smart_hist(sce$percent_synth_reads, xlab="Synthetic reads (%)",
    ylab="Number of cells", breaks=40, main="")

smart_hist(sce$percent_reads_unique, xlab="Unique reads (%)", 
    ylab="Number of cells", breaks=80, main="")

par(mfrow=c(3,2), mar=c(5, 4, 1, 1), bty="n", cex=0.9)

plot(log10(sce$total_counts), log10(sce$total_features), 
    xlab="log10(Library sizes)", ylab="log10(# of expressed genes)", 
    pch=20, cex=0.5)

plot(log10(sce$total_counts), sce$percent_synth_reads, 
    xlab="log10(Library sizes)", ylab="synth reads percent (%)",
    pch=20, cex=0.5)

plot(log10(sce$total_counts), sce$percent_reads_unique, 
    xlab="log10(Library sizes)", ylab="unique reads percent (%)",
    pch=20, cex=0.5)

plot(sce$percent_exon_reads, sce$percent_reads_unique, 
    xlab="exon reads percent (%)", ylab="unique reads percent (%)",
    pch=20, cex=0.5)

plot(sce$percent_aligned_reads_total, sce$percent_reads_unique, 
    xlab="aligned reads percent (%)", ylab="unique reads percent (%)",
    pch=20, cex=0.5)

plot(sce$genes_detected_fpkm_criterion, sce$percent_reads_unique, 
    xlab="detected genes (%)", ylab="unique reads percent (%)",
    pch=20, cex=0.5)

# Removing outliers defined as being percent_reads_unique lower than 50% 

keep = sce$percent_reads_unique > 50
table(colData(sce)$cell_type, keep)
##          keep
##           FALSE  TRUE
##   Astro       3   288
##   Endo        0     9
##   Exc        52 10473
##   Inh        13  4151
##   Micro       0    63
##   Oligo       0   313
##   OPC         0   238
##   unknown     2   323

We then subset the sce object to keep high quality samples(cells).

sce = sce[,keep]
dim(sce)
## [1] 50281 15858

3.1 Summarize gene-level information

We keep those genes that are expressed in at least 30 cells, which is roughly 0.2% of the cells. This match to the goal of this study, to detect cell types as rare as 0.2% of all the cells.

rowData(sce)[1:2,]
## DataFrame with 2 rows and 14 columns
##          gene  chromosome entrez_id
##   <character> <character> <integer>
## 1     3.8-1.2           6    353007
## 2     3.8-1.3           6    353008
##                                              gene_name mouse_homologenes
##                                            <character>       <character>
## 1 HLA complex group 26 (non-protein coding) pseudogene                  
## 2 HLA complex group 26 (non-protein coding) pseudogene                  
##   is_feature_control         mean_counts   log10_mean_counts
##            <logical>           <numeric>           <numeric>
## 1              FALSE 0.00878955298844802 0.00380057604028069
## 2              FALSE  0.0573204419889503   0.024206628837133
##   n_cells_by_counts pct_dropout_by_counts total_counts log10_total_counts
##           <integer>             <numeric>    <integer>          <numeric>
## 1                 7      99.9560522350578          140   2.14921911265538
## 2                21      99.8681567051733          913   2.96094619573383
##   n_cells_counts pct_dropout_counts
##        <integer>          <numeric>
## 1              7   99.9560522350578
## 2             21   99.8681567051733
summary(rowData(sce)$mean_counts)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##      0.00      0.02      0.31     15.13      5.56 101644.02
table(rowData(sce)$mean_counts == 0)
## 
## FALSE  TRUE 
## 48278  2003
summary(rowData(sce)$n_cells_counts)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      30     244    1744    1893   15928
table(colData(sce)$cell_type)
## 
##   Astro    Endo     Exc     Inh   Micro   Oligo     OPC unknown 
##     288       9   10473    4151      63     313     238     323
par(mfrow=c(2,2), mar=c(5,4,1,1))
smart_hist(log10(rowData(sce)$mean_counts+1e-6),main="",
    breaks=40, xlab="log10(ave # of UMI + 1e-6)")
smart_hist(log10(rowData(sce)$n_cells_counts+1),main="",
    breaks=40, xlab="log10(# of expressed cells + 1)")
smoothScatter(log10(rowData(sce)$mean_counts+1e-6),
    log10(rowData(sce)$n_cells_counts + 1),
    xlab="log10(ave # of UMI + 1e-6)",
    ylab="log10(# of expressed cells + 1)")

tb1 = table(rowData(sce)$n_cells_counts)
tb1[1:11]
## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 2003  567  555  559  662  574  539  562  494  441  445
ncol(sce)*0.002
## [1] 31.716
min_detect_min_sample = rowSums(counts(sce) > 0) > 30
table(min_detect_min_sample)
## min_detect_min_sample
## FALSE  TRUE 
## 12624 37657
min_mean_counts05 = rowData(sce)$mean_counts > 0.5
table(min_mean_counts05)
## min_mean_counts05
## FALSE  TRUE 
## 27404 22877
table(min_detect_min_sample,min_mean_counts05)
##                      min_mean_counts05
## min_detect_min_sample FALSE  TRUE
##                 FALSE 12624     0
##                 TRUE  14780 22877
sce = sce[which(min_detect_min_sample),]
dim(sce)
## [1] 37657 15858
# Next we check those highly expressed genes 
par(mfrow=c(1,2),mar=c(5,8,1,1))

od1 = order(rowData(sce)$mean_counts, decreasing = TRUE)
barplot(rowData(sce)$mean_counts[od1[20:1]], las=1, 
    names.arg=rowData(sce)$gene[od1[20:1]], 
    horiz=TRUE, cex.names=1, cex.axis=0.7, 
    xlab="ave # of UMI")
barplot(log10(rowData(sce)$mean_counts[od1[20:1]]), las=1, 
    names.arg=rowData(sce)$gene[od1[20:1]], 
    horiz=TRUE, cex.names=1, cex.axis=0.7, 
    xlab="log10(ave # of UMI)")

saveRDS(sce,file.path(work_dir,"post_gene_filter.rds"))
gc()
##             used   (Mb) gc trigger   (Mb) limit (Mb)   max used   (Mb)
## Ncells   5445189  290.9    8464344  452.1         NA    6988159  373.3
## Vcells 349874108 2669.4 1198985994 9147.6      16384 1250508184 9540.7
# To load image
# sce = readRDS(file.path(work_dir,"post_gene_filter.rds"))

3.2 Normalization

A simple solution for normalization and stablizing expression varaince across genes is to tranform the count data by log(count/size.factor + 1). One may calcualte size.factor per cell as the total number of UMIs, and this assumes the total expression are the same across all the cells. However, the total expression of each cell may vary with respect to cell type and/or cell size, and the computeSumFactors function in R package scran provides a more sophisicated way to calcualte size.factor to allow such variaation across cells (Lun, Bach, and Marioni 2016). computeSumFactors can use initial clustering of cells to normalize expression within and beetween clusters. Within a cluster, it estimates the size factor for many groups of cells so that there are more groups than cells, and then it can calcualte the size factor per cell using a lienar deconvolution system.

This mehtod will fail (i.e., giving negative size factors) if there are too many zeros in the count data. Therefore it is necessesary to remove “genes with a library size-adjusted average count below a specified threshold” using the parameter min.mean. See here for more explanations.

min_mean = 1

date()
## [1] "Tue Nov 13 21:22:33 2018"
clusters = quickCluster(sce, min.mean=min_mean, method="igraph")
table(clusters)
## clusters
##    1    2    3    4    5    6    7    8    9 
## 1422  254  236 2335  791  351 3481 4356 2632
date()
## [1] "Tue Nov 13 21:25:03 2018"
sce = computeSumFactors(sce, cluster=clusters, min.mean=min_mean)
date()
## [1] "Tue Nov 13 21:28:35 2018"
summary(sizeFactors(sce))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04694 0.81167 0.99403 1.00000 1.17318 7.09965

As shown in the following plot, the final size factor estimation is indeed highly correlated with the naive definition by total count.

par(mfrow=c(1,1), mar=c(5,4,2,1), bty="n")
plot(sce$total_counts, sizeFactors(sce), log="xy", 
    xlab="total counts", ylab="size factors", 
    cex=0.3, pch=20, col=rgb(0.1,0.2,0.7,0.3))

Finally, the command normalize(sce) adds the normalized expression into the variable sce.

sce = normalize(sce)

3.3 Dimension reduction

For dimension reduction, such as calculating PCA or performing TSNE, we should start by identifying a subset of genes with high level of biological signal relative to background (technical) noise. We start by examinng mean-variance relationship.

Since the information of individual spikeIns have been removed from this MTG dataset, we implemented the code below similar to here.

new_trend = makeTechTrend(x=sce)
fit = trendVar(sce, use.spikes=FALSE, loess.args=list(span=0.05))

par(mfrow=c(1,1), mar=c(5,4,2,1), bty="n")
plot(fit$mean, fit$vars, pch=20, col=rgb(0.1,0.2,0.7,0.6), 
    xlab="log(mean)", ylab="var")
curve(new_trend(x), col="red", lwd=2, add=TRUE)
curve(fit$trend(x), col="orange", lwd=2, add=TRUE)
legend("topright", legend=c("Poisson noise", "observed trend"), 
    lty=1, lwd=2, col=c("red", "orange"), bty="n")

The above function makeTechTrend() assumes a Poisson model and from the above plot, it is clearly not a suitable fit. So we will keep fit equal to the loess fit from trendVar() rather than setting it equal to the output from makeTechTrend().

In the following code, we used the decomposeVar function from R/cran to estimate the technical/biological component of variance of each gene.
> The technical component of the variance for each gene is determined by interpolating the fitted trend in fit at the mean log-count for that gene. This represents variance due to sequencing noise, variability in capture efficiency, etc. The biological component is determined by subtracting the technical component from the total variance. Highly variable genes (HVGs) can be identified as those with large biological components

dec     = decomposeVar(fit=fit)
top_dec = dec[order(dec$bio,decreasing=TRUE),]
top_dec[1:10,]
## DataFrame with 10 rows and 6 columns
##                        mean            total              bio
##                   <numeric>        <numeric>        <numeric>
## ENC1       6.10060913082233 17.6217621167186 8.22282120411021
## GAD1       2.61087351365303 17.4907256501264 7.93332359845671
## SPARCL1    7.18094050563783 14.9473271140345 7.23123060272616
## SLC6A1     2.97822510661336 17.3006427005694  7.1197826587595
## CHN1       7.92151177465267 13.7100779096596 6.96152127819161
## CXCL14     1.75174064365736 14.2816116216346 6.87885416656006
## ARPP21     6.91867962871774 14.8393276878734 6.75313018156838
## CCK        5.70614308125613 16.4894016630659  6.5322321783333
## SYNPR      2.90952983466325 16.4521427373326  6.3926064884151
## KCNIP4-IT1 5.63033910487119 16.3389288870539  6.2828917193941
##                        tech   p.value       FDR
##                   <numeric> <numeric> <numeric>
## ENC1       9.39894091260838         0         0
## GAD1       9.55740205166965         0         0
## SPARCL1    7.71609651130831         0         0
## SLC6A1     10.1808600418099         0         0
## CHN1         6.748556631468         0         0
## CXCL14      7.4027574550745         0         0
## ARPP21     8.08619750630504         0         0
## CCK        9.95716948473256         0         0
## SYNPR      10.0595362489175         0         0
## KCNIP4-IT1 10.0560371676599         0         0
par(mfrow=c(2,2))
smart_hist(dec$bio,breaks=30,xlab="Biological Variance")
smart_hist(dec$FDR,breaks=30,xlab="FDR")
smart_hist(log10(dec$FDR + 1e-6),breaks=30,xlab="log10(FDR + 1e-6)")
par(mfrow=c(1,1))

wtop = match(rownames(top_dec)[1:10], rownames(sce))
sce_sub = sce[wtop,]
dim(sce_sub)
## [1]    10 15858
plotExpression(sce_sub, features=1:10)

Here we only select approximately the top a few thousands highly variable genes based on tuning thresholds on dec$bio and dec$FDR from the earlier variance decomposition.

summary(dec$bio)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -10.011810  -0.073876  -0.005503  -0.006946   0.083358   8.222821
summary(dec$FDR)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  1.0000  0.5726  1.0000  1.0000
dec1 = dec
dec1$bio[which(dec1$bio < 1e-4)]  = 1e-4
dec1$FDR[which(dec1$FDR < 1e-50)] = 1e-50

par(mfrow=c(2,2))
smart_hist(log10(dec1$bio), breaks=100, xlab="log10(bio)")
smart_hist(log10(dec1$FDR), breaks=100, main="", xlab="log10(FDR)")
plot(log10(dec1$bio), log10(dec1$FDR), xlab="log10(bio)", ylab="log10(FDR)", 
     pch=20, cex=0.5)
par(mfrow=c(1,1))

FDR_thres = 1e-20 
bio_thres = 0.1
FDR_keep = dec$FDR < FDR_thres
bio_keep = dec$bio > bio_thres
table(FDR_keep,bio_keep)
##         bio_keep
## FDR_keep FALSE  TRUE
##    FALSE 26038  4157
##    TRUE   2714  4748
w2kp = which(dec1$FDR < FDR_thres & dec1$bio > bio_thres)
summary(rowData(sce)$n_cells_counts[w2kp])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   145.0   834.8  2203.0  3635.8  5434.2 15722.0
# Subsetting highly variable genes for subsequent PCA and TSNE
sce_hvg = sce[w2kp,]
sce_hvg
## class: SingleCellExperiment 
## dim: 4748 15858 
## metadata(1): log.exprs.offset
## assays(2): counts logcounts
## rownames(4748): A2M-AS1 AADAT ... ZXDA ZXDB
## rowData names(14): gene chromosome ... n_cells_counts
##   pct_dropout_counts
## colnames(15858): F1S4_160106_001_B01 F1S4_160106_001_C01 ...
##   F2S4_170405_060_F01 F2S4_170405_060_H01
## colData names(50): sample_name sample_id ...
##   pct_counts_top_200_features pct_counts_top_500_features
## reducedDimNames(0):
## spikeNames(0):

Next we use the selected genes for PCA and use top 50 PCs for TSNE plot.

edat = t(as.matrix(logcounts(sce_hvg)))
edat = scale(edat)
dim(edat)
## [1] 15858  4748
edat[1:2,1:3]
##                        A2M-AS1      AADAT      AAED1
## F1S4_160106_001_B01 -0.1914142 -0.5988004 -0.2027572
## F1S4_160106_001_C01 -0.1914142  1.2330208 -0.2027572
ppk = propack.svd(edat,neig=50)
pca = t(ppk$d*t(ppk$u))
dim(pca)
## [1] 15858    50
df_hvg = smart_df(pca)
rownames(df_hvg) = NULL
names(df_hvg) = paste0("PC",seq(ncol(df_hvg)))
df_hvg        = smart_df(sample_name = colnames(sce_hvg), df_hvg)

all_vars = c("log10_total_features", "sex", "brain_hemisphere",
    "brain_subregion", "facs_sort_criteria", "class", "cluster",
    "cell_type")

df_hvg = cbind(df_hvg, colData(sce_hvg)[,all_vars])
dim(df_hvg)
## [1] 15858    59
df_hvg[1:2,c(1:3,51:ncol(df_hvg))]
## DataFrame with 2 rows and 12 columns
##           sample_name               PC1               PC2             PC50
##           <character>         <numeric>         <numeric>        <numeric>
## 1 F1S4_160106_001_B01  1.16820625205664 -17.1162217744368 3.16437478259475
## 2 F1S4_160106_001_C01 -20.8281706930393 -1.15543466301022 2.24015151063231
##   log10_total_features         sex brain_hemisphere brain_subregion
##              <numeric> <character>      <character>     <character>
## 1     3.72065535655172           M                L              L5
## 2     3.91634865227546           M                L              L5
##   facs_sort_criteria         class             cluster   cell_type
##          <character>   <character>         <character> <character>
## 1      NeuN-positive     GABAergic Inh L4-6 SST B3GAT2         Inh
## 2      NeuN-positive Glutamatergic Exc L5-6 RORB TTC12         Exc
set.seed(100)
date()
## [1] "Tue Nov 13 21:33:26 2018"
tsne = Rtsne(pca, pca = FALSE)
date()
## [1] "Tue Nov 13 21:35:51 2018"
df_tsne = smart_df(tsne$Y)
names(df_tsne) = paste0("HVG_TSNE",seq(ncol(tsne$Y)))
dim(df_tsne)
## [1] 15858     2
df_tsne[1:2,]
##    HVG_TSNE1 HVG_TSNE2
## 1 -17.128998  48.11234
## 2  -7.126374 -12.36402
df_hvg = smart_df(df_hvg, df_tsne)

for(one_var in all_vars){
  cat(paste0(one_var,"\n"))
    print(ggplot_custom(DATA = df_hvg, X = "HVG_TSNE1", Y = "HVG_TSNE2", 
                        COL = one_var))
    # wait the graph to show up
    Sys.sleep(5)
}
## log10_total_features

## sex

## brain_hemisphere

## brain_subregion

## facs_sort_criteria

## class

## cluster

## cell_type

reducedDims(sce_hvg) = SimpleList(PCA=pca, TSNE=tsne$Y)
sce_hvg
## class: SingleCellExperiment 
## dim: 4748 15858 
## metadata(1): log.exprs.offset
## assays(2): counts logcounts
## rownames(4748): A2M-AS1 AADAT ... ZXDA ZXDB
## rowData names(14): gene chromosome ... n_cells_counts
##   pct_dropout_counts
## colnames(15858): F1S4_160106_001_B01 F1S4_160106_001_C01 ...
##   F2S4_170405_060_F01 F2S4_170405_060_H01
## colData names(50): sample_name sample_id ...
##   pct_counts_top_200_features pct_counts_top_500_features
## reducedDimNames(2): PCA TSNE
## spikeNames(0):
saveRDS(list(sce_hvg=sce_hvg, df_hvg=df_hvg), 
        file.path(work_dir, "post_redDim_HVG.rds"))

rm(edat)
gc()
##              used    (Mb) gc trigger    (Mb) limit (Mb)   max used    (Mb)
## Ncells    6358274   339.6   11080605   591.8         NA   10071397   537.9
## Vcells 1665537535 12707.1 1961982633 14968.8      16384 1960338556 14956.2

3.4 Clustering

3.4.1 Kmeans

We first try clustering using kmeans.

all_num_clust = c(5:15)
df_hvg = df_hvg[,!grepl("^KM_",names(df_hvg))]

rd.idx = adj.rd.idx = matrix(NA, nrow=length(all_num_clust), ncol=2)
k = 0

for(num_clust in all_num_clust){
  k = k + 1
  cat(paste0("KM with ",num_clust," clusters.\n"))
  
  kmeans_out = kmeans(reducedDim(sce_hvg,"PCA"), centers = num_clust,
                      iter.max = 5e2, algorithm = "MacQueen")
  
  km.label = paste0("KM_",num_clust)
  df_hvg[[km.label]] = as.factor(kmeans_out$cluster)
  
  print(table(df_hvg$cell_type, kmeans_out$cluster))
  
  print(ggplot_custom(DATA = df_hvg, X = "HVG_TSNE1", Y = "HVG_TSNE2", 
                      COL = paste0("KM_",num_clust)))
}
## KM with 5 clusters.
##          
##              1    2    3    4    5
##   Astro    288    0    0    0    0
##   Endo       8    0    0    1    0
##   Exc       15    2 5318    8 5130
##   Inh        4 4125    0   13    9
##   Micro     62    0    0    1    0
##   Oligo      4    0    0  309    0
##   OPC      237    0    0    1    0
##   unknown   36   42  154   17   74

## KM with 6 clusters.
##          
##              1    2    3    4    5    6
##   Astro      0    0    0    0  288    0
##   Endo       1    0    0    0    8    0
##   Exc        9 1040    2 4916   15 4491
##   Inh       13    2 4126    0    4    6
##   Micro      1    0    0    0   62    0
##   Oligo    309    0    0    0    4    0
##   OPC        1    0    0    0  237    0
##   unknown   17    8   42  147   36   73

## KM with 7 clusters.
##          
##              1    2    3    4    5    6    7
##   Astro      0    0  288    0    0    0    0
##   Endo       0    0    9    0    0    0    0
##   Exc        0    1   21 2053 2635 3108 2655
##   Inh     2308 1822   13    1    1    6    0
##   Micro      0    0   63    0    0    0    0
##   Oligo      0    0  313    0    0    0    0
##   OPC        0    0  238    0    0    0    0
##   unknown   29   14   48   17  113   60   42

## KM with 8 clusters.
##          
##              1    2    3    4    5    6    7    8
##   Astro      0    0    0    0    0    0    0  288
##   Endo       0    0    0    0    0    0    0    9
##   Exc        0 2432 2108    1 3052 2859    0   21
##   Inh      895    1    4 1824    1    0 1413   13
##   Micro      0    0    0    0    0    0    0   63
##   Oligo      0    0    0    0    0    0    0  313
##   OPC        0    0    0    0    0    0    0  238
##   unknown    4   63   42   15   39   86   25   49

## KM with 9 clusters.
##          
##              1    2    3    4    5    6    7    8    9
##   Astro      0    0  288    0    0    0    0    0    0
##   Endo       0    0    8    0    0    0    0    1    0
##   Exc     2011    1   13    0 2536 1760 2077    8 2067
##   Inh        0 1821    4 2304    4    0    2   12    4
##   Micro      0    0   62    0    0    0    0    1    0
##   Oligo      0    0    4    0    0    0    0  309    0
##   OPC        0    0  237    0    0    0    0    1    0
##   unknown   44   12   34   27  101   28   19   17   41

## KM with 10 clusters.
##          
##              1    2    3    4    5    6    7    8    9   10
##   Astro      0    0    0  288    0    0    0    0    0    0
##   Endo       0    0    0    9    0    0    0    0    0    0
##   Exc        1 1705 2348   21  442 2011    0  623 2039 1283
##   Inh     3046    1    0   13    0    2 1084    1    0    4
##   Micro      0    0    0   63    0    0    0    0    0    0
##   Oligo      0    0    0  313    0    0    0    0    0    0
##   OPC        0    0    0  238    0    0    0    0    0    0
##   unknown   33   31   89   48    3   18   10    8   52   31

## KM with 11 clusters.
##          
##              1    2    3    4    5    6    7    8    9   10   11
##   Astro      0    0    0    0    0    0    0  287    0    1    0
##   Endo       0    0    6    0    0    0    0    0    0    3    0
##   Exc     3192    0    8    0    1  269 2621   12 2861  441 1068
##   Inh        2  818   14 1271 1089    0    1    1    0  955    0
##   Micro      0    0   58    0    0    0    0    1    0    4    0
##   Oligo      0    0  311    0    0    0    0    2    0    0    0
##   OPC        0    0  233    0    0    0    0    4    0    1    0
##   unknown   56    1   19   16    4   24   39   24   75   57    8

## KM with 12 clusters.
##          
##              1    2    3    4    5    6    7    8    9   10   11   12
##   Astro      0    0    1    0    0    0   66    0    0    0  221    0
##   Endo       0    0    3    0    0    6    0    0    0    0    0    0
##   Exc        1 1664  439  431 2900    8    1    0 1969  351   11 2698
##   Inh     1126    1 1044    0    1   14    0 1963    0    0    1    1
##   Micro      0    0    4    0    0   58    0    0    0    0    1    0
##   Oligo      0    0    0    0    0  311    0    0    0    0    2    0
##   OPC        0    0    1    0    0  233    1    0    0    0    3    0
##   unknown    4   36   59    3   40   19    1   16   59    1   23   62

## KM with 13 clusters.
##          
##              1    2    3    4    5    6    7    8    9   10   11   12   13
##   Astro      0    0    0    0    0    0    0    1    0    0    0    0  287
##   Endo       0    0    0    0    0    0    0    8    0    0    0    1    0
##   Exc      763 1582 1315 1619    1 1659 1002   10 1145    0 1363    2   12
##   Inh        1    2    0    0  876    0    0   14    0 2092    0 1165    1
##   Micro      0    0    0    0    0    0    0   62    0    0    0    0    1
##   Oligo      0    0    0    0    0    0    0  311    0    0    0    0    2
##   OPC        0    0    0    0    0    0    0  233    0    0    1    0    4
##   unknown   21   14   23   28    4   31    8   22   20   21   83   25   23

## KM with 14 clusters.
##          
##              1    2    3    4    5    6    7    8    9   10   11   12   13
##   Astro      0    0    0    0    0    0    0    0    0    0    0    0  288
##   Endo       0    0    0    0    0    0    0    0    0    0    0    0    9
##   Exc     1267 1378    1 1482 1044 1198 1531  259    0 1042 1252    0   19
##   Inh        1    0  848    0    0    0    0    0 1345    1    2 1056   12
##   Micro      0    0    0    0    0    0    0    0    0    0    0    0   63
##   Oligo      0    0    0    0    0    0    0    0    0    0    0    0  313
##   OPC        1    0    0    0    0    0    0    0    0    0    0    0  237
##   unknown   71   26    5   18    7   27   26   22   22    9   26   16   44
##          
##             14
##   Astro      0
##   Endo       0
##   Exc        0
##   Inh      886
##   Micro      0
##   Oligo      0
##   OPC        0
##   unknown    4

## KM with 15 clusters.
##          
##              1    2    3    4    5    6    7    8    9   10   11   12   13
##   Astro      0    0    0    0  287    0    0    0    0    0    0    1    0
##   Endo       0    0    1    0    0    0    0    0    0    0    0    8    0
##   Exc        0 1867    8  260   12 1494 1483 1552    1    2    0   16  326
##   Inh     1279    0   11    0    1    0    0    1 1210    4  835  807    1
##   Micro      0    0    0    0    0    0    0    0    0   62    0    1    0
##   Oligo      0    0  310    0    2    0    0    0    0    1    0    0    0
##   OPC        0    0    1    0    4    0    0    0    0    0    0  233    0
##   unknown   15   24   12   22   21   73   17   15    4    9    3   35    7
##          
##             14   15
##   Astro      0    0
##   Endo       0    0
##   Exc     1798 1654
##   Inh        1    1
##   Micro      0    0
##   Oligo      0    0
##   OPC        0    0
##   unknown   38   28

3.4.2 SC3

Next we try to clustering cell using SC3. Code used here is based on this link. Since SC3 is computationally much more expensive, we only try three number of clusters, 5, 10, and 15.

rowData(sce_hvg)$feature_symbol = rowData(sce_hvg)$gene

all_ks = c(5,10,15)

date()
## [1] "Tue Nov 13 21:37:25 2018"
sce_hvg = sc3(sce_hvg, ks = all_ks, biology = TRUE,
    n_cores = num_cores, rand_seed = 100, svm_num_cells = 2000)
## Setting SC3 parameters...
## Your dataset contains more than 2000 cells. Adjusting the nstart parameter of kmeans to 50 for faster performance...
## Defining training cells for SVM using svm_num_cells parameter...
## Calculating distances between the cells...
## Performing transformations and calculating eigenvectors...
## Performing k-means clustering...
## Calculating consensus matrix...
## Calculating biology...
warnings()
date()
## [1] "Tue Nov 13 22:29:02 2018"
# Run SVM and predict labels of all other cells
date()
## [1] "Tue Nov 13 22:29:02 2018"
sce_hvg = sc3_run_svm(sce_hvg, ks = all_ks)
date()
## [1] "Tue Nov 13 22:34:42 2018"
saveRDS(list(sce_hvg=sce_hvg, all_ks=all_ks), 
        file.path(work_dir, "post_HVG_sc3.rds"))
gc()
##              used    (Mb) gc trigger    (Mb) limit (Mb)   max used    (Mb)
## Ncells    5594786   298.8   11080605   591.8         NA   11080605   591.8
## Vcells 1676410862 12790.0 1964023166 14984.4      16384 1962345449 14971.6

Next we compare the clustering results from SC3 and cell types.

for(one_ks in all_ks){
  sc3.label = paste0("sc3_",one_ks,"_clusters")
    df_hvg[[sc3.label]] = as.factor(colData(sce_hvg)[,sc3.label])

    print(table(df_hvg$cell_type, df_hvg[[sc3.label]]))

    print(ggplot_custom(DATA = df_hvg, X = "HVG_TSNE1", Y = "HVG_TSNE2", 
                        COL = sc3.label))
}
##          
##              1    2    3    4    5
##   Astro      0    0  288    0    0
##   Endo       0    0    9    0    0
##   Exc     6774 3690    8    0    1
##   Inh        1    2    1 1685 2462
##   Micro      0    0   63    0    0
##   Oligo      3    0  310    0    0
##   OPC        1    0  237    0    0
##   unknown  175   57   43   27   21

##          
##              1    2    3    4    5    6    7    8    9   10
##   Astro      0    0    0    0   41  247    0    0    0    0
##   Endo       0    0    0    0    3    6    0    0    0    0
##   Exc     3754  999    0    1    1  201 1580  306 1746 1885
##   Inh        2  227 1697 1680  541    3    0    0    1    0
##   Micro      0    0    0    0   10   53    0    0    0    0
##   Oligo      0    0    0    0   43  269    0    0    1    0
##   OPC        0    0    0    0   32  206    0    0    0    0
##   unknown   60   16   25   15    7   45   62   11   46   36

##          
##              1    2    3    4    5    6    7    8    9   10   11   12   13
##   Astro      0    0    0    0    0    0  247    0    0    0    0    0    0
##   Endo       0    0    0    0    0    0    6    0    0    0    0    0    0
##   Exc     1807  221  711  253   11  490  200 1586    0 1777  105 1673 1589
##   Inh        0  736    0  533  680    1    5  136  126  101    0   85  103
##   Micro      0    0    0    0    0    0   53    0    0    0    0    0    0
##   Oligo      1    0    0    0    0    0  269    0    0    0    0    0    0
##   OPC        0    0    0    0    0    0  206    0    0    0    0    0    0
##   unknown   50    8    4    3   14   17   45   38    1   14    0   37   63
##          
##             14   15
##   Astro      0   41
##   Endo       0    3
##   Exc       49    1
##   Inh      853  792
##   Micro      0   10
##   Oligo      0   43
##   OPC        0   32
##   unknown   13   16

all_vars = c("brain_subregion", "class", "cell_type")

for(one_ks in all_ks){
  gc()

    # one_ks = all_ks[1]
    cat(paste0("Num Clusters = ",one_ks,"\n"))
    
  sc3.label = paste0("sc3_",one_ks,"_clusters")
    sc3.outlier = paste0("sc3_",one_ks,"_log2_outlier_score")
    
    # seems this consensus plot use too much memory 
    # sc3_plot_consensus(sce_hvg, k = one_ks, 
    #                    show_pdata = c(all_vars, sc3.label, sc3.outlier))

    sc3_plot_markers(sce_hvg, k = one_ks, 
                     show_pdata = c(all_vars, sc3.label, sc3.outlier))
}
## Num Clusters = 5

## Num Clusters = 10

## Num Clusters = 15

Finally we save the sce object, sce_hvg object, and the clustering results.

saveRDS(sce, file.path(work_dir, "final_sce.rds"))
saveRDS(sce_hvg, file.path(work_dir, "final_sce_hvg.rds"))
saveRDS(df_hvg,  file.path(work_dir, "final_hvg_clust.rds"))

4 Session information

sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.14
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] SC3_1.8.0                   Rtsne_0.13                 
##  [3] svd_0.4.1                   data.table_1.11.4          
##  [5] limma_3.36.5                scran_1.8.4                
##  [7] scater_1.8.4                ggplot2_3.0.0              
##  [9] SingleCellExperiment_1.2.0  SummarizedExperiment_1.10.1
## [11] DelayedArray_0.6.4          BiocParallel_1.14.2        
## [13] matrixStats_0.54.0          Biobase_2.40.0             
## [15] GenomicRanges_1.32.6        GenomeInfoDb_1.16.0        
## [17] IRanges_2.14.10             S4Vectors_0.18.3           
## [19] BiocGenerics_0.26.0        
## 
## loaded via a namespace (and not attached):
##  [1] ggbeeswarm_0.6.0         colorspace_1.3-2        
##  [3] rjson_0.2.20             class_7.3-14            
##  [5] dynamicTreeCut_1.63-1    rprojroot_1.3-2         
##  [7] XVector_0.20.0           DT_0.4                  
##  [9] mvtnorm_1.0-8            codetools_0.2-15        
## [11] tximport_1.8.0           doParallel_1.0.11       
## [13] robustbase_0.93-2        knitr_1.20              
## [15] cluster_2.0.7-1          pheatmap_1.0.10         
## [17] shinydashboard_0.7.0     shiny_1.1.0             
## [19] rrcov_1.4-4              compiler_3.5.0          
## [21] backports_1.1.2          assertthat_0.2.0        
## [23] Matrix_1.2-14            lazyeval_0.2.1          
## [25] later_0.7.3              htmltools_0.3.6         
## [27] tools_3.5.0              bindrcpp_0.2.2          
## [29] igraph_1.2.2             gtable_0.2.0            
## [31] glue_1.3.0               GenomeInfoDbData_1.1.0  
## [33] reshape2_1.4.3           dplyr_0.7.6             
## [35] doRNG_1.7.1              Rcpp_0.12.18            
## [37] gdata_2.18.0             iterators_1.0.10        
## [39] DelayedMatrixStats_1.2.0 stringr_1.3.1           
## [41] mime_0.5                 irlba_2.3.2             
## [43] rngtools_1.3.1           gtools_3.8.1            
## [45] WriteXLS_4.0.0           statmod_1.4.30          
## [47] edgeR_3.22.3             DEoptimR_1.0-8          
## [49] zlibbioc_1.26.0          scales_1.0.0            
## [51] promises_1.0.1           rhdf5_2.24.0            
## [53] RColorBrewer_1.1-2       yaml_2.2.0              
## [55] gridExtra_2.3            pkgmaker_0.27           
## [57] stringi_1.2.4            pcaPP_1.9-73            
## [59] foreach_1.4.4            e1071_1.7-0             
## [61] caTools_1.17.1.1         bibtex_0.4.2            
## [63] rlang_0.2.1              pkgconfig_2.0.1         
## [65] bitops_1.0-6             evaluate_0.11           
## [67] lattice_0.20-35          ROCR_1.0-7              
## [69] purrr_0.2.5              Rhdf5lib_1.2.1          
## [71] bindr_0.1.1              htmlwidgets_1.2         
## [73] labeling_0.3             cowplot_0.9.3           
## [75] tidyselect_0.2.4         plyr_1.8.4              
## [77] magrittr_1.5             R6_2.2.2                
## [79] gplots_3.0.1             pillar_1.3.0            
## [81] withr_2.1.2              RCurl_1.95-4.11         
## [83] tibble_1.4.2             crayon_1.3.4            
## [85] KernSmooth_2.23-15       rmarkdown_1.10          
## [87] viridis_0.5.1            locfit_1.5-9.1          
## [89] grid_3.5.0               FNN_1.1.2.1             
## [91] digest_0.6.15            xtable_1.8-2            
## [93] httpuv_1.4.5             munsell_0.5.0           
## [95] beeswarm_0.2.3           registry_0.5            
## [97] viridisLite_0.3.0        vipor_0.4.5

Reference

Lun, Aaron TL, Karsten Bach, and John C Marioni. 2016. “Pooling Across Cells to Normalize Single-Cell Rna Sequencing Data with Many Zero Counts.” Genome Biology 17 (1). BioMed Central: 75.